{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 6 Pandas的函数应用"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "# Numpy ufunc 函数，randn跟的是维数\n",
    "df = pd.DataFrame(np.random.randn(5,4) - 1)\n",
    "print(df)\n",
    "\n",
    "print(np.abs(df)) #绝对值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:17:20.849786Z",
     "start_time": "2025-02-26T06:17:19.320968Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0 -1.225209 -0.943073 -1.001662 -0.812092\n",
      "1 -2.437980 -1.450956 -1.954682 -1.918512\n",
      "2 -2.160614  0.099376 -1.517648 -1.501357\n",
      "3  0.092978 -0.122510 -1.516423 -1.217934\n",
      "4 -0.924922 -0.585473 -1.264876 -2.072640\n",
      "          0         1         2         3\n",
      "0  1.225209  0.943073  1.001662  0.812092\n",
      "1  2.437980  1.450956  1.954682  1.918512\n",
      "2  2.160614  0.099376  1.517648  1.501357\n",
      "3  0.092978  0.122510  1.516423  1.217934\n",
      "4  0.924922  0.585473  1.264876  2.072640\n"
     ]
    }
   ],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "source": [
    "#apply默认作用在列上,x是每一列,因为axis=0\n",
    "print(df.apply(lambda x : x.max(),axis=0))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:18:19.290647Z",
     "start_time": "2025-02-26T06:18:19.287052Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.092978\n",
      "1    0.099376\n",
      "2   -1.001662\n",
      "3   -0.812092\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "source": [
    "#apply作用在行上\n",
    "print(df.apply(lambda x : x.max(), axis=1))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:19:36.040687Z",
     "start_time": "2025-02-26T06:19:36.037086Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -0.812092\n",
      "1   -1.450956\n",
      "2    0.099376\n",
      "3    0.092978\n",
      "4   -0.585473\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "# 使用applymap应用到每个数据\n",
    "print(df.map(lambda x : '%.2f' % x))\n",
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:20:50.179761Z",
     "start_time": "2025-02-26T06:20:50.175143Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2      3\n",
      "0  -1.23  -0.94  -1.00  -0.81\n",
      "1  -2.44  -1.45  -1.95  -1.92\n",
      "2  -2.16   0.10  -1.52  -1.50\n",
      "3   0.09  -0.12  -1.52  -1.22\n",
      "4  -0.92  -0.59  -1.26  -2.07\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    float64\n",
       "1    float64\n",
       "2    float64\n",
       "3    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "source": [
    "type('%.2f' % 1.3456)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:21:16.632485Z",
     "start_time": "2025-02-26T06:21:16.629462Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "str"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "markdown",
   "source": "## 6.4 索引排序（放到最后讲解）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 4 3 1 1]\n",
      "2    10\n",
      "0    11\n",
      "1    12\n",
      "0    13\n",
      "0    14\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "0    11\n",
      "0    13\n",
      "0    14\n",
      "1    12\n",
      "2    10\n",
      "dtype: int64\n",
      "2    10\n",
      "0    11\n",
      "1    12\n",
      "0    13\n",
      "0    14\n",
      "dtype: int64\n",
      "2    10\n",
      "0    11\n",
      "1    12\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": "2    10\n0    11\n1    12\ndtype: int64"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series\n",
    "print(np.random.randint(5, size=5))\n",
    "s4 = pd.Series(range(10, 15), index = np.random.randint(5, size=5)) #索引随机生成\n",
    "print(s4)\n",
    "print('-'*50)\n",
    "# 索引排序,sort_index返回一个新的排好索引的series\n",
    "print(s4.sort_index())\n",
    "print(s4)\n",
    "# s4.loc[0:3]  loc索引值不唯一时直接报错\n",
    "print(s4.iloc[0:3])\n",
    "s4[0:3]  #默认用的位置索引"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:29:09.302420700Z",
     "start_time": "2024-04-11T03:29:09.266437200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "# s4.loc[1:2] #loc索引值唯一时可以切片"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         3         2         3         0\n",
      "0 -0.137610 -0.858287 -0.486837 -0.018233  1.223872\n",
      "1  0.026397 -0.063860  0.085134  0.232169  0.426953\n",
      "2 -1.281888 -0.910263  1.482847 -0.947013  0.401932\n",
      "3  0.603027  1.002181  1.451099  0.564141  0.076904\n",
      "1  0.001383 -1.250364 -1.222472 -1.596742  0.732299\n",
      "          0         3         2         3         0\n",
      "3  0.603027  1.002181  1.451099  0.564141  0.076904\n",
      "2 -1.281888 -0.910263  1.482847 -0.947013  0.401932\n",
      "1  0.026397 -0.063860  0.085134  0.232169  0.426953\n",
      "1  0.001383 -1.250364 -1.222472 -1.596742  0.732299\n",
      "0 -0.137610 -0.858287 -0.486837 -0.018233  1.223872\n"
     ]
    }
   ],
   "source": [
    "# DataFrame\n",
    "df4 = pd.DataFrame(np.random.randn(5, 5),\n",
    "                   index=np.random.randint(5, size=5),\n",
    "                   columns=np.random.randint(5, size=5))\n",
    "print(df4)\n",
    "#轴零是行索引排序\n",
    "df4_isort = df4.sort_index(axis=0, ascending=False)\n",
    "print(df4_isort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:31:24.485192800Z",
     "start_time": "2024-04-11T03:31:24.431222800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         0         2         3         3\n",
      "0 -0.137610  1.223872 -0.486837 -0.858287 -0.018233\n",
      "1  0.026397  0.426953  0.085134 -0.063860  0.232169\n",
      "2 -1.281888  0.401932  1.482847 -0.910263 -0.947013\n",
      "3  0.603027  0.076904  1.451099  1.002181  0.564141\n",
      "1  0.001383  0.732299 -1.222472 -1.250364 -1.596742\n"
     ]
    }
   ],
   "source": [
    "#轴1是列索引排序\n",
    "df4_isort = df4.sort_index(axis=1, ascending=True)\n",
    "print(df4_isort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:32:04.675507Z",
     "start_time": "2024-04-11T03:32:04.664513700Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": "# 6.5 按值排序（机器学习，深度学习不重要，数据分析才需要，直接跳过）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 65  21  58  61]\n",
      " [ 88  25  35  86]\n",
      " [ 65  28  63   5]\n",
      " [ 64  45  95  23]\n",
      " [  0  92  57 100]\n",
      " [ 52  57  89  24]]\n",
      "--------------------------------------------------\n",
      "    0   1   2    3\n",
      "0  65  21  58   61\n",
      "1  88  25  35   86\n",
      "2  65  28  63    5\n",
      "3  64  45  95   23\n",
      "4   0  92  57  100\n",
      "5  52  57  89   24\n",
      "--------------------------------------------------\n",
      "    0   1   2    3\n",
      "4   0  92  57  100\n",
      "1  88  25  35   86\n",
      "0  65  21  58   61\n",
      "5  52  57  89   24\n",
      "3  64  45  95   23\n",
      "2  65  28  63    5\n"
     ]
    }
   ],
   "source": [
    "# 按值排序,by后是column的值\n",
    "import random\n",
    "l=[random.randint(0,100) for i in range(24)] #生成24个随机数\n",
    "df4 = pd.DataFrame(np.array(l).reshape(6,4)) #生成6行4列的dataframe\n",
    "print(df4.values) #查看数据,ndarray\n",
    "print('-'*50)\n",
    "print(df4)\n",
    "print('-'*50)\n",
    "#按轴零排序，by后是列名,交换的是行\n",
    "df4_vsort = df4.sort_values(by=3,axis=0, ascending=False) #寻找的是columns里的3,重要\n",
    "print(df4_vsort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:33:41.459835200Z",
     "start_time": "2024-04-11T03:33:41.427843Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    2   0   1    3\n",
      "0  58  65  21   61\n",
      "1  35  88  25   86\n",
      "2  63  65  28    5\n",
      "3  95  64  45   23\n",
      "4  57   0  92  100\n",
      "5  89  52  57   24\n"
     ]
    }
   ],
   "source": [
    "#按轴1排序，by后行索引名，交换的是列\n",
    "df4_vsort = df4.sort_values(by=3,axis=1, ascending=False) #寻找的是index里的3\n",
    "print(df4_vsort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:36:15.358985700Z",
     "start_time": "2024-04-11T03:36:15.309976600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 6.6 处理缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "df_data = pd.DataFrame([np.random.randn(3), [1., 2., np.nan],\n",
    "                       [np.nan, 4., np.nan], [1., 2., 3.]])\n",
    "print(df_data.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:25:02.370875Z",
     "start_time": "2025-02-26T06:25:02.358791Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0        1         2\n",
      "0 -0.913741  0.19534  0.608869\n",
      "1  1.000000  2.00000       NaN\n",
      "2       NaN  4.00000       NaN\n",
      "3  1.000000  2.00000  3.000000\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "source": [
    "df_data.iloc[2,0]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-02-26T06:27:17.043719Z",
     "start_time": "2025-02-26T06:27:17.039865Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "nan"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "source": [
    "#isnull来判断是否有空的数据\n",
    "print(df_data.isnull())\n",
    "#帮我算一下每一列的缺失率\n",
    "print(df_data.isnull().sum()/df_data.shape[0])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:28:01.073752Z",
     "start_time": "2025-02-26T06:28:01.070146Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2\n",
      "0  False  False  False\n",
      "1  False  False   True\n",
      "2   True  False   True\n",
      "3  False  False  False\n",
      "0    0.25\n",
      "1    0.00\n",
      "2    0.50\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 删除缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "#默认一个样本，任何一个特征缺失，就删除\n",
    "#inplace True是修改的是原有的df\n",
    "#subset=[0]是指按第一列来删除,第一列有空值就删除对应的行\n",
    "print(df_data.dropna(subset=[0]))\n",
    "# df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:30:58.358253Z",
     "start_time": "2025-02-26T06:30:58.353780Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0        1         2\n",
      "0 -0.913741  0.19534  0.608869\n",
      "1  1.000000  2.00000       NaN\n",
      "3  1.000000  2.00000  3.000000\n"
     ]
    }
   ],
   "execution_count": 15
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-02-26T06:31:13.656956Z",
     "start_time": "2025-02-26T06:31:13.652747Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0        1         2\n",
       "0 -0.913741  0.19534  0.608869\n",
       "1  1.000000  2.00000       NaN\n",
       "2       NaN  4.00000       NaN\n",
       "3  1.000000  2.00000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.913741</td>\n",
       "      <td>0.19534</td>\n",
       "      <td>0.608869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "source": [
    "#用的不多，某个特征缺失太多时\n",
    "print(df_data.dropna(axis=1))  #某列由nan就删除该列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:31:10.480681Z",
     "start_time": "2025-02-26T06:31:10.476591Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         1\n",
      "0  0.19534\n",
      "1  2.00000\n",
      "2  4.00000\n",
      "3  2.00000\n"
     ]
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-02-26T06:32:19.351046Z",
     "start_time": "2025-02-26T06:32:19.346508Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0        1         2\n",
       "0 -0.913741  0.19534  0.608869\n",
       "1  1.000000  2.00000       NaN\n",
       "2       NaN  4.00000       NaN\n",
       "3  1.000000  2.00000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.913741</td>\n",
       "      <td>0.19534</td>\n",
       "      <td>0.608869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 18
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 填充缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "#给零列的空值填为-100，按特征（按列）去填充  \n",
    "print(df_data.iloc[:,0].fillna(-100.))\n",
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:32:58.499087Z",
     "start_time": "2025-02-26T06:32:58.494087Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     -0.913741\n",
      "1      1.000000\n",
      "2   -100.000000\n",
      "3      1.000000\n",
      "Name: 0, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "          0        1         2\n",
       "0 -0.913741  0.19534  0.608869\n",
       "1  1.000000  2.00000       NaN\n",
       "2       NaN  4.00000       NaN\n",
       "3  1.000000  2.00000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.913741</td>\n",
       "      <td>0.19534</td>\n",
       "      <td>0.608869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.77195\n",
      "1    1.00000\n",
      "2        NaN\n",
      "3    1.00000\n",
      "Name: 0, dtype: float64\n",
      "0    0.039062\n",
      "1    2.000000\n",
      "2    4.000000\n",
      "3    2.000000\n",
      "Name: 1, dtype: float64\n",
      "0   -0.585232\n",
      "1         NaN\n",
      "2         NaN\n",
      "3    3.000000\n",
      "Name: 2, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "#依次拿到每一列\n",
    "for i in df_data.columns:\n",
    "    print(df_data.loc[:,i])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:43:15.684081800Z",
     "start_time": "2024-04-11T06:43:15.663094600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T06:33:46.635559Z",
     "start_time": "2025-02-26T06:33:46.631455Z"
    }
   },
   "cell_type": "code",
   "source": "df_data",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0        1         2\n",
       "0 -0.913741  0.19534  0.608869\n",
       "1  1.000000  2.00000       NaN\n",
       "2       NaN  4.00000       NaN\n",
       "3  1.000000  2.00000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.913741</td>\n",
       "      <td>0.19534</td>\n",
       "      <td>0.608869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 20
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T06:33:57.061318Z",
     "start_time": "2025-02-26T06:33:57.058158Z"
    }
   },
   "cell_type": "code",
   "source": "df_data.dropna(axis=1,inplace=True)",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    }
   ],
   "execution_count": 21
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T06:34:05.793816Z",
     "start_time": "2025-02-26T06:34:05.790106Z"
    }
   },
   "cell_type": "code",
   "source": "df_data",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "         1\n",
       "0  0.19534\n",
       "1  2.00000\n",
       "2  4.00000\n",
       "3  2.00000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.19534</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 22
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
